Live App is running at https://udisc.mattlichti.com/

All the data was submitted through the Udisc Android & iPhone apps https://udisc.com/

The code is based on lessons 1 & 2 of fastai course 3 https://course.fast.ai/

%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.vision import *
from fastai.metrics import error_rate
import urllib.request as req
import pandas as pd
import os
import numpy as np

Step 1: Cleaning the Data and loading it into the fastai ImageDataBunch class

df = pd.read_csv('transformed_97_discs.csv', index_col=0)
df = df.loc[df.downloaded]
df = df.loc[df['size']>=25000]
df.shape
(63815, 13)
np.random.seed(42)
df = df[df['discName'].isin(['Leopard', 'Buzzz', 'Firebird'])]
df = df.sample(3000)
df.groupby('discName').count()
manufacturerName image plastic created type speed size downloaded disc_label plastic_label folder path
discName
Buzzz 1168 1168 967 1168 1158 839 1168 1168 1168 1168 1168 1168
Firebird 786 786 755 786 778 715 786 786 786 786 786 786
Leopard 1046 1046 834 1046 1040 1006 1046 1046 1046 1046 1046 1046
df.index = df.path
df = df.loc[:, ['disc_label']]
df.to_csv('disc_images/labels.csv')
np.random.seed(42)
path = Path('/home/jupyter/disc_classifier/disc_images')
data = ImageDataBunch.from_csv(path, valid_pct=.1,
        ds_tfms=get_transforms(do_flip=False), size=224, num_workers=4).normalize(imagenet_stats)
data.classes, len(data.train_ds), len(data.valid_ds)
(['Discraft Buzzz', 'Innova Firebird', 'Innova Leopard'], 2700, 300)
data.show_batch()

Step 2: Training the Convolutional Neural Net. We use a 50 layer ResNet that has been pretrained on ImageNet. https://en.wikipedia.org/wiki/Residual_neural_network

learn = cnn_learner(data, models.resnet50, metrics=error_rate)
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn.fit_one_cycle(4, max_lr=3*1e-3)
Total time: 03:27

epoch train_loss valid_loss error_rate time
0 0.801391 0.698489 0.223333 00:53
1 0.523101 0.402900 0.143333 00:51
2 0.348445 0.353320 0.160000 00:51
3 0.239959 0.340474 0.140000 00:51
</div> </div> </div> </div> </div>
learn.save('3_discs-1-4')

Step 3: Interpreting the Results

interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(5,5))
interp.plot_top_losses(16, figsize=(25,25), heatmap=False)

Step 4 (optional): Train more layers of the neural net to improve performance

learn.unfreeze()
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn.fit_one_cycle(3, max_lr=slice(5*1e-5,3*1e-4))
Total time: 02:34

epoch train_loss valid_loss error_rate time
0 0.179046 0.387953 0.136667 00:52
1 0.142624 0.331104 0.103333 00:51
2 0.090960 0.257891 0.096667 00:50
</div> </div> </div> </div> </div>
learn.save('3_discs-2-3')
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(5,5))

Step 5: Putting in to production

defaults.device = torch.device('cpu')
path = Path('/home/jupyter/disc_classifier/disc_images')
data = ImageDataBunch.from_csv(path, valid_pct=.1,
        ds_tfms=get_transforms(flip_vert=True), size=224, num_workers=4).normalize(imagenet_stats)   
learn = cnn_learner(data, models.resnet50, metrics=error_rate)
 
path = Path('/home/jupyter/disc_classifier/')
leopard = open_image(path/'leopard.jpeg')
leopard = open_image(path/'leopard.jpeg')
leopard
pred_class,pred_idx,outputs = learn.predict(leopard)
pred_class
Category Innova Leopard
outputs
tensor([1.3064e-05, 7.1869e-04, 9.9927e-01])
data.classes
['Discraft Buzzz', 'Innova Firebird', 'Innova Leopard']
learn.export('3discs.pkl')
def predict_disc(learn, img):
    pred_class,pred_idx,outputs = learn.predict(img)
    idx = np.argsort(-outputs)[0]
    if outputs[idx].item()>.75:
        output = 'We think your disc is: ' + str(pred_class)
    else:
        output = "Sorry, we aren't sure what kind of disc that is."
    output += '\nTop disc mold probabilities for your disc: \n' 
    for idx in np.argsort(-outputs)[:10]:
        if outputs[idx].item()>.00005:
            output += str(data.classes[idx]) + ': '
            output += str(round(outputs[idx].item()*100,2)) + '%' + '\n'
    return(output)
print(predict_disc(learn, leopard))
We think your disc is: Innova Leopard
Top disc mold probabilities for your disc: 
Innova Leopard: 99.93%
Innova Firebird: 0.07%

firebird = open_image(path/'firebird.jpg')
firebird
print(predict_disc(learn, firebird))
We think your disc is: Innova Leopard
Top disc mold probabilities for your disc: 
Innova Leopard: 97.34%
Innova Firebird: 1.98%
Discraft Buzzz: 0.68%

</div>